{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import gym\n",
    "import matplotlib\n",
    "import numpy as np\n",
    "import sys\n",
    "\n",
    "from collections import defaultdict\n",
    "\n",
    "if \"../\" not in sys.path:\n",
    "  sys.path.append(\"../\") \n",
    "from lib.envs.blackjack import BlackjackEnv\n",
    "from lib import plotting\n",
    "\n",
    "matplotlib.style.use('ggplot')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "env = BlackjackEnv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def mc_prediction(policy, env, num_episodes, discount_factor=1.0):\n",
    "    \"\"\"\n",
    "    Monte Carlo prediction algorithm. Calculates the value function\n",
    "    for a given policy using sampling.\n",
    "    \n",
    "    Args:\n",
    "        policy: A function that maps an observation to action probabilities.\n",
    "        env: OpenAI gym environment.\n",
    "        num_episodes: Number of episodes to sample.\n",
    "        discount_factor: Gamma discount factor.\n",
    "    \n",
    "    Returns:\n",
    "        A dictionary that maps from state -> value.\n",
    "        The state is a tuple and the value is a float.\n",
    "    \"\"\"\n",
    "\n",
    "    # Keeps track of sum and count of returns for each state\n",
    "    # to calculate an average. We could use an array to save all\n",
    "    # returns (like in the book) but that's memory inefficient.\n",
    "    returns_sum = defaultdict(float)\n",
    "    returns_count = defaultdict(float)\n",
    "    \n",
    "    # The final value function\n",
    "    V = defaultdict(float)\n",
    "    \n",
    "    # Implement this!\n",
    "\n",
    "    return V    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def sample_policy(observation):\n",
    "    \"\"\"\n",
    "    A policy that sticks if the player score is > 20 and hits otherwise.\n",
    "    \"\"\"\n",
    "    score, dealer_score, usable_ace = observation\n",
    "    return 0 if score >= 20 else 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "V_10k = mc_prediction(sample_policy, env, num_episodes=10000)\n",
    "plotting.plot_value_function(V_10k, title=\"10,000 Steps\")\n",
    "\n",
    "V_500k = mc_prediction(sample_policy, env, num_episodes=500000)\n",
    "plotting.plot_value_function(V_500k, title=\"500,000 Steps\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
